Import Libraries

In [1]:
In [2]:

Data preprocessing

In [3]:
Out[3]:
match_no day_of_week date hour venue referee group 1 2 attendance ... 1_panelties_scored 2_panelties_scored 1_goal_prevented 2_goal_prevented 1_own_goal 2_own_goal 1_forced_turnovers 2_forced_turnovers 1_defensive_pressure_applied 2_defensive_pressure_applied
0 1 Sun 20-Nov-22 17:00 Al Bayt Stadium Daniele Orsato Group A QATAR ECUADOR 67372 ... 0 1 6 5 0 0 52 72 256 279
1 2 Mon 21-Nov-22 14:00 Khalifa International Stadium Raphael Claus Group B ENGLAND IRAN 45334 ... 0 1 8 13 0 0 63 72 139 416
2 3 Mon 21-Nov-22 17:00 Al Thumama Stadium Wilton Sampaio Group A SENEGAL NETHERLANDS 41721 ... 0 0 9 15 0 0 63 73 263 251

3 rows × 59 columns

In [4]:
Out[4]:
(64, 59)
In [5]:
In [6]:
In [7]:
In [8]:
Out[8]:
Index(['match_no', 'day_of_week', 'date', 'hour', 'venue', 'referee', 'group',
       'home_team', 'away_team', 'attendance', 'homeXG', 'awayXG', 'home_poss',
       'away_poss', 'home_goals', 'away_goals', 'score', 'home_attempts',
       'away_attempts', 'home_conceded', 'away_conceded', 'homeG_inside',
       'awayG_inside', 'homeG_outside', 'awayG_outside', 'home_ontarget',
       'away_ontarget', 'home_offtarget', 'away_offtarget',
       '1_attempts_inside_penalty_area', '2_attempts_inside_penalty_area',
       '1_attempts_outside_penalty_area', '2_attempts_outside_penalty_area',
       'home_yellow_cards', 'away_yellow_cards', 'home_RedCards',
       'away_RedCards', 'home_fauls', 'away_fauls', '1_offsides', '2_offsides',
       'home_passes', 'away_passes', '1_passes_compeletd',
       '2_passes_compeletd', 'home_corners', 'away_corners', 'home_free_kicks',
       'away_free_kicks', 'homeg_panelties', 'awayg_panelties',
       'home_goal_prevented', 'away_goal_prevented', 'home_owngoals',
       'away_owngoals', 'home_forced_turnovers', 'away_forced_turnovers',
       'home_pressure', 'away_pressure'],
      dtype='object')
In [9]:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 64 entries, 0 to 63
Data columns (total 59 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   match_no                         64 non-null     int64  
 1   day_of_week                      64 non-null     object 
 2   date                             64 non-null     object 
 3   hour                             64 non-null     object 
 4   venue                            64 non-null     object 
 5   referee                          64 non-null     object 
 6   group                            64 non-null     object 
 7   home_team                        64 non-null     object 
 8   away_team                        64 non-null     object 
 9   attendance                       64 non-null     int64  
 10  homeXG                           64 non-null     float64
 11  awayXG                           64 non-null     float64
 12  home_poss                        64 non-null     int64  
 13  away_poss                        64 non-null     int64  
 14  home_goals                       64 non-null     int64  
 15  away_goals                       64 non-null     int64  
 16  score                            64 non-null     object 
 17  home_attempts                    64 non-null     int64  
 18  away_attempts                    64 non-null     int64  
 19  home_conceded                    64 non-null     int64  
 20  away_conceded                    64 non-null     int64  
 21  homeG_inside                     64 non-null     int64  
 22  awayG_inside                     64 non-null     int64  
 23  homeG_outside                    64 non-null     int64  
 24  awayG_outside                    64 non-null     int64  
 25  home_ontarget                    64 non-null     int64  
 26  away_ontarget                    64 non-null     int64  
 27  home_offtarget                   64 non-null     int64  
 28  away_offtarget                   64 non-null     int64  
 29  1_attempts_inside_penalty_area   64 non-null     int64  
 30  2_attempts_inside_penalty_area   64 non-null     int64  
 31  1_attempts_outside_penalty_area  64 non-null     int64  
 32  2_attempts_outside_penalty_area  64 non-null     int64  
 33  home_yellow_cards                64 non-null     int64  
 34  away_yellow_cards                64 non-null     int64  
 35  home_RedCards                    64 non-null     int64  
 36  away_RedCards                    64 non-null     int64  
 37  home_fauls                       64 non-null     int64  
 38  away_fauls                       64 non-null     int64  
 39  1_offsides                       64 non-null     int64  
 40  2_offsides                       64 non-null     int64  
 41  home_passes                      64 non-null     int64  
 42  away_passes                      64 non-null     int64  
 43  1_passes_compeletd               64 non-null     int64  
 44  2_passes_compeletd               64 non-null     int64  
 45  home_corners                     64 non-null     int64  
 46  away_corners                     64 non-null     int64  
 47  home_free_kicks                  64 non-null     int64  
 48  away_free_kicks                  64 non-null     int64  
 49  homeg_panelties                  64 non-null     int64  
 50  awayg_panelties                  64 non-null     int64  
 51  home_goal_prevented              64 non-null     int64  
 52  away_goal_prevented              64 non-null     int64  
 53  home_owngoals                    64 non-null     int64  
 54  away_owngoals                    64 non-null     int64  
 55  home_forced_turnovers            64 non-null     int64  
 56  away_forced_turnovers            64 non-null     int64  
 57  home_pressure                    64 non-null     int64  
 58  away_pressure                    64 non-null     int64  
dtypes: float64(2), int64(48), object(9)
memory usage: 29.6+ KB
In [10]:
In [11]:
Out[11]:
date    datetime64[ns]
hour             int64
dtype: object

handle players_data

In [12]:
Out[12]:
0    30-067
1    32-094
Name: age, dtype: object
In [13]:
Out[13]:
player                  object
position                object
team                    object
age                     object
birth_year               int64
games                    int64
minutes                float64
minutes_per_game       float64
minutes_pct            float64
minutes_90s            float64
games_starts             int64
minutes_per_start      float64
games_complete           int64
games_subs               int64
minutes_per_sub        float64
unused_subs              int64
points_per_game        float64
on_goals_for           float64
on_goals_against       float64
plus_minus             float64
plus_minus_per90       float64
plus_minus_wowy        float64
on_xg_for              float64
on_xg_against          float64
xg_plus_minus          float64
xg_plus_minus_per90    float64
xg_plus_minus_wowy     float64
dtype: object
In [14]:
Out[14]:
Index(['player', 'position', 'team', 'age', 'birth_year', 'games', 'minutes',
       'minutes_per_game', 'minutes_pct', 'minutes_90s', 'games_starts',
       'minutes_per_start', 'games_complete', 'games_subs', 'minutes_per_sub',
       'unused_subs', 'points_per_game', 'on_goals_for', 'on_goals_against',
       'plus_minus', 'plus_minus_per90', 'plus_minus_wowy', 'on_xg_for',
       'on_xg_against', 'xg_plus_minus', 'xg_plus_minus_per90',
       'xg_plus_minus_wowy'],
      dtype='object')
In [ ]:
In [15]:
In [16]:
In [17]:
In [18]:
28 23 22 25 31
In [19]:
In [ ]:

Visualization

  1. Histogram
  2. Barplot
  3. Piechart
  4. TreeMap
  5. Heatmap
  6. ScatterPlot

Histogram

  • Using to show Distribution of numerical attributes
In [20]:
In [21]:
0123456780100200300400500
teamAustraliaWalesMoroccoQatarSenegalGhanaSaudi ArabiaIR IranFranceUruguayTunisiaEcuadorSpainSerbiaBrazilDenmarkArgentinaMexicoCanadaBelgiumCameroonCroatiaNetherlandsPortugalCosta RicaUnited StatesGermanyJapanSwitzerlandPolandEnglandKorea Republiccount of goals by teamsgoalscount
In [22]:
In [23]:
Abdelhamid SabiriAzzedine OunahiBilal El KhannousIlias ChairSelim AmallahSofyan AmrabatYahya JabraneAbderrazak HamdallahAbdessamad EzzalzouliAnass ZarouryHakim ZiyechSofiane BoufalWalid CheddiraYoussef En-NesyriZakaria AboukhlalAchraf DariAchraf HakimiBadr BanounJawad El YamiqNayef AguerdNoussair MazraouiRomain SaïssYahya Attiat AllahMunir02468101214
positionMFFWDFGKplayersum of aerials_lost
In [24]:
Out[24]:
<AxesSubplot:xlabel='home_goals', ylabel='Count'>

BarPlot

this barplot shows every team with players postions mainly four different positions are found commonly in every team

In [25]:
AustraliaWalesMoroccoQatarSaudi ArabiaFranceUruguayIR IranEcuadorArgentinaCanadaBelgiumCameroonGhanaMexicoTunisiaJapanPortugalCosta RicaUnited StatesBrazilSpainSenegalDenmarkPolandNetherlandsEnglandSwitzerlandSerbiaKorea RepublicGermanyCroatia0246810
positionMFDFFWGKGroup Players by team and Postionteamcount
In [26]:
Out[26]:
group goals_total
0 Final 6
1 Group A 15
2 Group B 16
3 Group C 12
4 Group D 11
5 Group E 22
6 Group F 11
7 Group G 16
8 Group H 17
9 Play-off for third place 3
10 Quarter-final 10
11 Round of 16 28
12 Semi-Final 5
In [27]:
FinalGroup AGroup BGroup CGroup DGroup EGroup FGroup GGroup HPlay-off for third placeQuarter-finalRound of 16Semi-Final0510152025
Total Goals Scored in each roundgroupgoals_total

semi-final match represrntation

In [28]:
Out[28]:
home_team away_team home_goals away_goals home_poss away_poss
60 ARGENTINA CROATIA 3.0 0.0 40.0 60.0
61 FRANCE MOROCCO 2.0 0.0 39.0 61.0
In [29]:
In [30]:
010203040ARGENTINAFRANCE0204060CROATIAMOROCCO
trace 0trace 1
In [31]:
In [32]:
47774658696160623674020406080QATARENGLANDSENEGALUNITED STATESARGENTINADENMARKMEXICOFRANCEMOROCCOGERMANY532354423139403864260204060ECUADORIRANNETHERLANDSWALESSAUDI ARABIATUNISIAPOLANDAUSTRALIACROATIAJAPAN
Home_teamAway_team
In [33]:
In [34]:
ARGENTINABELGIUMCAMEROONCOSTA RICADENMARKENGLANDGERMANYIRANKOREA REPUBLICMOROCCOPOLANDQATARSENEGALSPAINTUNISIAURUGUAY0510WALESIRANSENEGALMEXICOSWITZERLANDECUADORTUNISIAUNITED STATESAUSTRALIASERBIAPOLANDKOREA REPUBLICCROATIANETHERLANDSENGLANDPORTUGAL0510Abdelhamid SabiriAbdessamad EzzalzouliAchraf HakimiAzzedine OunahiBilal El KhannousIlias ChairMunirNoussair MazraouiSelim AmallahSofyan AmrabatYahya Attiat AllahYassine BounouZakaria Aboukhlal0510
un sortedsortedNumber of losses aerlies
In [35]:
In [76]:
In [77]:
111111112222222334444444556677777GERMANYECUADORBRAZILURUGUAYCOSTA RICASPAINENGLANDSWITZERLANDMEXICOGHANAFRANCESENEGALAUSTRALIADENMARKIRANWALESCROATIACANADAUNITED STATESKOREA REPUBLICBELGIUMTUNISIAPOLANDMOROCCOPORTUGALJAPANSAUDI ARABIAQATARNETHERLANDSCAMEROONSERBIAARGENTINA01234567
colshome_RedCardshome_yellow_cardsnumber of yellow and red cardshome_teamvals

Scatter plot

In [38]:
In [39]:
Out[39]:
venue match_no attendance
49 Lusail Iconic Stadium 24 88966
55 Lusail Iconic Stadium 61 88966
56 Lusail Iconic Stadium 64 88966
50 Lusail Iconic Stadium 32 88668
54 Lusail Iconic Stadium 58 88235
... ... ... ...
1 Ahmed bin Ali Stadium 12 40432
24 Al Thumama Stadium 11 40013
19 Al Janoub Stadium 29 39789
57 Stadium 974 7 39369
17 Al Janoub Stadium 13 39089

64 rows × 3 columns

In [40]:
In [41]:
010203040506040k50k60k70k80k90k
colorLusail Iconic StadiumAl Bayt StadiumKhalifa International StadiumAhmed bin Ali StadiumEducation City StadiumAl Thumama StadiumStadium 974Al Janoub Stadiumxy
In [42]:
In [43]:
In [44]:
051015202530024681012
correlation between #attempts and #balls on targetxy
In [45]:
Abdelhamid SabiriAzzedine OunahiBilal El KhannousIlias ChairSelim AmallahSofyan AmrabatYahya JabraneAbderrazak HamdallahAbdessamad EzzalzouliAnass ZarouryHakim ZiyechSofiane BoufalWalid CheddiraYoussef En-NesyriZakaria AboukhlalAchraf DariAchraf HakimiBadr BanounJawad El YamiqNayef AguerdNoussair MazraouiRomain SaïssYahya Attiat AllahMunirYassine Bounou051015051015051015051015
positionMFFWDFGKNumber of Aerlis lost by morrocplayeraerials_lostaerials_lostaerials_lostaerials_lostposition=GKposition=DFposition=FWposition=MF

Line Plot

Number of Shots Trace for teams final

In [46]:
In [47]:
In [48]:
C:\Users\HP\Anaconda3\lib\site-packages\plotly\graph_objs\_deprecations.py:378: DeprecationWarning:

plotly.graph_objs.Line is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.scatter.Line
  - plotly.graph_objs.layout.shape.Line
  - etc.


Alexis Mac AllisterCristian RomeroEmiliano MartínezEnzo FernándezExequiel PalaciosGermán PezzellaGonzalo MontielGuido RodríguezJuan FoythJulián ÁlvarezLautaro MartínezLeandro ParedesLionel MessiLisandro MartínezMarcos AcuñaNahuel MolinaNicolás OtamendiNicolás TagliaficoPapu GómezPaulo DybalaRodrigo De PaulThiago AlmadaÁngel CorreaÁngel Di María0510152025Adrien RabiotAntoine GriezmannAurélien TchouaméniAxel DisasiBenjamin PavardDayot UpamecanoEduardo CamavingaHugo LlorisIbrahima KonatéJordan VeretoutJules KoundéKingsley ComanKylian MbappéLucas HernándezMarcus ThuramMattéo GuendouziOlivier GiroudOusmane DembéléRandal Kolo MuaniRaphaël VaraneSteve MandandaTheo HernándezWilliam SalibaYoussouf Fofana051015202530
Argentina TraceFrance Trace

Dual-axis Line plot to see the changes about home_goals and away_goals

In [49]:
In [50]:

some data prepration

In [51]:
In [52]:
In [53]:
In [54]:
In [55]:
In [56]:
In [57]:

cards signed by every team

In [58]:
Out[58]:
home_team 0
0 ARGENTINA 15
1 AUSTRALIA 4
2 BELGIUM 1
3 BRAZIL 8
4 CAMEROON 4
5 CANADA 2
6 COSTA RICA 3
7 CROATIA 8
8 DENMARK 1
9 ECUADOR 4
10 ENGLAND 13
11 FRANCE 16
12 GERMANY 6
13 GHANA 5
14 IRAN 4
15 JAPAN 5
16 KOREA REPUBLIC 5
17 MEXICO 2
18 MOROCCO 6
19 NETHERLANDS 10
20 POLAND 3
21 PORTUGAL 12
22 QATAR 1
23 SAUDI ARABIA 3
24 SENEGAL 5
25 SERBIA 5
26 SPAIN 9
27 SWITZERLAND 5
28 TUNISIA 1
29 UNITED STATES 3
30 URUGUAY 2
31 WALES 1

Boxplot

In [59]:
In [60]:
0510152025
teamTunisiaIR IranBrazilNetherlandsCameroonWalesPortugalCroatiaSenegalArgentinaSwitzerlandMexicoEcuadorFranceEnglandDenmarkCosta RicaKorea RepublicGhanaGermanyAustraliaUnited StatesQatarCanadaSaudi ArabiaMoroccoUruguayJapanBelgiumSpainSerbiaPolandGK_savesgk_saves
In [78]:
C:\Users\HP\Anaconda3\lib\site-packages\seaborn\_decorators.py:36: FutureWarning:

Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.

Out[78]:
<AxesSubplot:xlabel='vals'>
In [62]:
Out[62]:
<AxesSubplot:>

Type Markdown and LaTeX: α2

In [63]:
In [64]:
In [65]:

pie Chart

Visualize top 10 players scored goals

In [66]:
19.5%17.1%9.76%9.76%7.32%7.32%7.32%7.32%7.32%7.32%
Kylian MbappéLionel MessiJulián ÁlvarezOlivier GiroudGonçalo RamosÁlvaro MorataBukayo SakaMarcus RashfordRicharlisonEnner Valencia

Show the number of attendence in each venue

In [67]:
25.7%17.7%10.4%10.3%9.92%8.8%8.75%8.48%
Lusail Iconic StadiumAl Bayt StadiumKhalifa International StadiumEducation City StadiumAl Thumama StadiumAhmed bin Ali StadiumStadium 974Al Janoub StadiumVenue with Total Attendence

TreeMap

In [79]:
C:\Users\HP\Anaconda3\lib\site-packages\plotly\express\_core.py:1637: FutureWarning:

The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.

FRANCEARGENTINAENGLANDPORTUGALNETHERLANDSSPAINBRAZILCROATIAGERMANYMOROCCOGHANAJAPANKOREA REPUBLICSENEGALSERBIASWITZERLANDAUSTRALIACAMEROONECUADORIRANCOSTA RICAPOLANDSAUDI ARABIAUNITED STATESCANADAMEXICOURUGUAYBELGIUMDENMARKQATARTUNISIAWALES
Team Goals

from the above plot conclude that france is the first team in scoring goals with 16 goals

In [69]:
C:\Users\HP\Anaconda3\lib\site-packages\plotly\express\_core.py:1637: FutureWarning:

The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.

C:\Users\HP\Anaconda3\lib\site-packages\plotly\express\_core.py:1637: FutureWarning:

The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.

ARGENTINAFRANCEPORTUGALENGLANDNETHERLANDSSPAINBRAZILCROATIACAMEROONKOREA REPUBLICJAPANCOSTA RICAPOLANDSERBIAAUSTRALIABELGIUMCANADAECUADORGERMANYMOROCCOQATARSAUDI ARABIASWITZERLANDTUNISIAUNITED STATES CROATIAFRANCEAUSTRALIAMEXICOSAUDI ARABIAAUSTRALIAPOLANDDENMARKMOROCCOSWITZERLANDGHANAURUGUAYIRANSENEGALFRANCE UNITED STATESARGENTINAQATARECUADORCOSTA RICAGERMANYKOREA REPUBLICSERBIASWITZERLANDCANADAMOROCCOBRAZIL SERBIABRAZILGHANAPORTUGALSPAINCROATIA GERMANYSAUDI ARABIA SWITZERLANDDENMARKCANADA MOROCCOSENEGALJAPANPORTUGAL SENEGAL MEXICOCAMEROONFRANCE WALES

HeatMap

In [70]:
783116112104952003010024680204060
0246810countTotal Number of goalsgoals_totalindex
In [71]:
11111111110000000000000000000000000000000011111111110000000000000000000000000000000011111111110000000000000000000000000000000011ARGENTINAAUSTRALIABELGIUMBRAZILCAMEROONCANADACOSTA RICACROATIADENMARKECUADORENGLANDFRANCEGERMANYGHANAIRANJAPANKOREA REPUBLICMEXICOMOROCCONETHERLANDSPOLANDPORTUGALQATARSAUDI ARABIASENEGALSERBIASPAINSWITZERLANDTUNISIAUNITED STATESURUGUAYWALES0102030
00.20.40.60.81countTotal Number of goalshome_teamindex
In [72]:
In [73]:

pairplot for home_team and it's statistics

In [74]:
In [75]:
Out[75]:
<seaborn.axisgrid.PairGrid at 0x2e10dbece50>
In [ ]: